from openai import OpenAI
from globle import configs as cfg
import json
import pandas as pd
from tqdm import tqdm
'''
需求：基于LLM文本分类, 并测一下准确率
步骤:
1. 把LLM接口封装成一个方法，传入文本
2. 加载数据
3. 遍历数据调用LLM分类
提示：tqdm.pandas()之后把df[X].apply()换成df.progress_apply()可显示进度条
4. 计算准确率
5. 保存新数据
'''


tqdm.pandas()  # 初始化 tqdm for pandas

client = OpenAI(api_key=cfg.DEEPSEEK_API_KEY, base_url="https://api.deepseek.com")

def classification(text):
    completion = client.chat.completions.create(
        model="deepseek-chat",
        messages=[
            {"role":"system","content":"请判断该评论情感上属于正面评论还是负面评论,以json返回,字段 -label: 1(正面),0(负面)"},
            {"role":"user","content":text}
        ],
        response_format={"type":'json_object'},
        temperature=0.1,
    )
    json_str = completion.choices[0].message.content
    dict = json.loads(json_str)
    return dict['label']

df = pd.read_csv("test.csv")[:30]
df['label_predict'] = df['text'].progress_apply(classification)
df.to_csv("test_predict.csv", index=False)
# 计算准确率
acc = (df['label'] == df['label_predict']).mean()
print(f"准确率: {acc:.3f}")

